import os

import utils_file_local
def concat_tsv(input_dir_1, input_dir_2, output_dir, split='train'):
    """"""
    input_tsv_dict_1 = utils_file_local.load_dict_from_tsv(os.path.join(input_dir_1, split + ".tsv"), is_relpath=False)
    input_tsv_dict_2 = utils_file_local.load_dict_from_tsv(os.path.join(input_dir_2, split + ".tsv"), is_relpath=False)
    output_tsv_path = os.path.join(output_dir, split + ".tsv")
    res_dict = input_tsv_dict_1
    res_dict.update(input_tsv_dict_2)
    utils_file_local.save_dict_to_tsv(res_dict, output_tsv_path)

def concat_km_label(input_dir_1, input_dir_2, output_dir, split='train'):
    """"""
    input_km_list_1 = utils_file_local.load_list_file_clean(os.path.join(input_dir_1, split + ".km"))
    input_km_list_2 = utils_file_local.load_list_file_clean(os.path.join(input_dir_2, split + ".km"))
    output_tm_path = os.path.join(output_dir, split + ".km")
    res_list = input_km_list_1 + input_km_list_2
    utils_file_local.write_list_to_file(res_list, output_tm_path)




if __name__ == "__main__":
    concat_tsv("/home/node27_tmpdata/xlgeng/pachong_10W_data/fairseq_data/manifest/chat_1500","/home/node27_tmpdata/xlgeng/pachong_10W_data/fairseq_data/manifest/chat_3500", "/home/node27_tmpdata/xlgeng/pachong_10W_data/fairseq_data/manifest/chat_1500_3500")
    # concat_tsv("/home/node27_tmpdata/xlgeng/pachong_10W_data/fairseq_data/manifest/chat_1000", "/home/node27_tmpdata/xlgeng/pachong_10W_data/fairseq_data/manifest/ximalaya_redian_2T","/home/node27_tmpdata/xlgeng/pachong_10W_data/fairseq_data/manifest/ximalaya_redian_2T_chat_1000")
    # concat_km_label("/home/node27_tmpdata/xlgeng/pachong_10W_data/fairseq_data/hubert_feat/chat_1000/k-means/hubert_large_by_xlgeng_extract_layer_9",
    #                 "/home/node27_tmpdata/xlgeng/pachong_10W_data/fairseq_data/hubert_feat/ximalaya_redian_2T/k-means/hubert_large_by_xlgeng_extract_layer_9",
    #                 "/home/node27_tmpdata/xlgeng/pachong_10W_data/fairseq_data/hubert_feat/ximalaya_redian_2T_chat_1000/k-means/hubert_large_by_xlgeng_extract_layer_9")
    # concat_tsv("/home/node27_tmpdata/xlgeng/pachong_10W_data/fairseq_data/manifest/chat_1000",
    #            "/home/node27_tmpdata/xlgeng/pachong_10W_data/fairseq_data/manifest/ximalaya_redian_2T",
    #            "/home/node27_tmpdata/xlgeng/pachong_10W_data/fairseq_data/manifest/ximalaya_redian_2T_chat_1000",
    #            split='valid')
    # concat_km_label(
    #     "/home/node27_tmpdata/xlgeng/pachong_10W_data/fairseq_data/hubert_feat/chat_1000/k-means/hubert_large_by_xlgeng_extract_layer_9",
    #     "/home/node27_tmpdata/xlgeng/pachong_10W_data/fairseq_data/hubert_feat/ximalaya_redian_2T/k-means/hubert_large_by_xlgeng_extract_layer_9",
    #     "/home/node27_tmpdata/xlgeng/pachong_10W_data/fairseq_data/hubert_feat/ximalaya_redian_2T_chat_1000/k-means/hubert_large_by_xlgeng_extract_layer_9",
    #     split='valid')